I am loading the NY NOAA dataset from the p8105 dataset. I decided to randomly sample 1000 from the complete dataset as it is pretty large.

data("ny_noaa", package = "p8105.datasets")
ny_noaa_sample <- ny_noaa |>
  filter(!is.na(tmax)) |> 
  slice_sample(n = 1000)

Plots for NY NOAA Data.

Scatterplot of Precipitation vs. Temperature

plot_ly(ny_noaa_sample, x = ~tmax, y = ~prcp, type = 'scatter', mode = 'markers') |>
  layout(title = "Scatter Plot of Temperature vs. Precipitation",
         xaxis = list(title = "Max Temperature (tmax)"),
         yaxis = list(title = "Precipitation (prcp)"))
## Warning: Ignoring 25 observations
Line Plot of Max Temperature Over Time
plot_ly(ny_noaa_sample, x = ~date, y = ~tmax, type = 'scatter', mode = 'lines') |>
  layout(title = "Line Plot of Max Temperature Over Time",
         xaxis = list(title = "Date"),
         yaxis = list(title = "Max Temperature (tmax)"))
Bar Plot of Average Maximum Temperature By Month
ny_noaa_sample <- ny_noaa_sample |>
  mutate(
    tmax = as.numeric(tmax),    
    tmin = as.numeric(tmin),    
    month = month(date, label = TRUE) 
  ) |>
  filter(!is.na(tmax))
ny_noaa_summary <- ny_noaa_sample |>
  group_by(month) |>
  summarize(avg_tmax = mean(tmax, na.rm = TRUE))
ggplot(ny_noaa_summary, aes(x = month, y = avg_tmax)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Average Maximum Temperature by Month",
       x = "Month",
       y = "Average Max Temperature (°C)") +
  theme_minimal()